/*	$OpenBSD: rf_reconbuffer.c,v 1.3 2000/08/08 16:07:44 peter Exp $	*/
/*	$NetBSD: rf_reconbuffer.c,v 1.4 2000/03/13 23:52:36 soren Exp $	*/

/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Mark Holland
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/*****************************************************
 *
 * rf_reconbuffer.c -- Reconstruction buffer manager.
 *
 *****************************************************/

#include "rf_raid.h"
#include "rf_reconbuffer.h"
#include "rf_acctrace.h"
#include "rf_etimer.h"
#include "rf_general.h"
#include "rf_debugprint.h"
#include "rf_revent.h"
#include "rf_reconutil.h"
#include "rf_nwayxor.h"

#define	Dprintf1(s,a) if (rf_reconbufferDebug) printf(s, a)
#define	Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s, a, b)
#define	Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s, a, b, c)
#define	Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s, a, b, c, d)
#define	Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s, a, b, c, d, e)

/*****************************************************************************
 *
 * Submit a reconstruction buffer to the manager for XOR.
 * We can only submit a buffer if (1) we can xor into an existing buffer,
 * which means we don't have to acquire a new one, (2) we can acquire a
 * floating recon buffer, or (3) the caller has indicated that we are allowed
 * to keep the submitted buffer.
 *
 * Returns non-zero if and only if we were not able to submit.
 * In this case, we append the current disk ID to the wait list on the
 * indicated RU, so that it will be re-enabled when we acquire a buffer for
 * this RU.
 *
 *****************************************************************************/

/* Just to make the code below more readable. */
#define	BUFWAIT_APPEND(_cb_,_pssPtr_,_row_,_col_)			\
do {									\
	_cb_ = rf_AllocCallbackDesc();					\
	(_cb_)->row = (_row_);						\
	(_cb_)->col = (_col_);						\
	(_cb_)->next = (_pssPtr_)->bufWaitList;				\
	(_pssPtr_)->bufWaitList = (_cb_);				\
} while (0)

/*
 * rf_nWayXorFuncs[i] is a pointer to a function that will xor "i"
 * bufs into the accumulating sum.
 */
static RF_VoidFuncPtr rf_nWayXorFuncs[] = {
	NULL,
	(RF_VoidFuncPtr) rf_nWayXor1,
	(RF_VoidFuncPtr) rf_nWayXor2,
	(RF_VoidFuncPtr) rf_nWayXor3,
	(RF_VoidFuncPtr) rf_nWayXor4,
	(RF_VoidFuncPtr) rf_nWayXor5,
	(RF_VoidFuncPtr) rf_nWayXor6,
	(RF_VoidFuncPtr) rf_nWayXor7,
	(RF_VoidFuncPtr) rf_nWayXor8,
	(RF_VoidFuncPtr) rf_nWayXor9
};


int
rf_SubmitReconBuffer(
    RF_ReconBuffer_t	*rbuf,		/* The recon buffer to submit. */
    int			 keep_it,	/*
					 * Whether we can keep this buffer or
					 * we have to return it.
					 */
    int			 use_committed	/*
					 * Whether to use a committed or an
					 * available recon buffer.
					 */
)
{
	RF_LayoutSW_t *lp;
	int rc;

	lp = rbuf->raidPtr->Layout.map;
	rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed);
	return (rc);
}

int
rf_SubmitReconBufferBasic(
    RF_ReconBuffer_t	*rbuf,		/* The recon buffer to submit. */
    int			 keep_it,	/*
					 * Whether we can keep this buffer
					 * or we have to return it.
					 */
    int			 use_committed	/*
					 * Whether to use a committed or
					 * an available recon buffer.
					 */
)
{
	RF_Raid_t *raidPtr = rbuf->raidPtr;
	RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
	RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[rbuf->row];
	RF_ReconParityStripeStatus_t *pssPtr;
	/* Temporary rbuf pointers. */
	RF_ReconBuffer_t *targetRbuf, *t = NULL;
	/* Temporary data buffer pointer. */
	caddr_t ta;
	RF_CallbackDesc_t *cb, *p;
	int retcode = 0, created = 0;

	RF_Etimer_t timer;

	/* Makes no sense to have a submission from the failed disk. */
	RF_ASSERT(rbuf);
	RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);

	Dprintf5("RECON: submission by row %d col %d for psid %ld ru %d"
	    " (failed offset %ld).\n", rbuf->row, rbuf->col,
	    (long) rbuf->parityStripeID, rbuf->which_ru,
	    (long) rbuf->failedDiskSectorOffset);

	RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);

	RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);

	pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable,
	    rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created);
	RF_ASSERT(pssPtr);	/*
				 * If it didn't exist, we wouldn't have gotten
				 * an rbuf for it.
				 */

	/*
	 * Check to see if enough buffers have accumulated to do an XOR. If
	 * so, there's no need to acquire a floating rbuf. Before we can do
	 * any XORing, we must have acquired a destination buffer. If we
	 * have, then we can go ahead and do the XOR if (1) including this
	 * buffer, enough bufs have accumulated, or (2) this is the last
	 * submission for this stripe. Otherwise, we have to go acquire a
	 * floating rbuf.
	 */

	targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
	if ((targetRbuf != NULL) &&
	    ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) ||
	     (targetRbuf->count + pssPtr->xorBufCount + 1 ==
	      layoutPtr->numDataCol))) {
		/* Install this buffer. */
		pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf;
		Dprintf3("RECON: row %d col %d invoking a %d-way XOR.\n",
		    rbuf->row, rbuf->col, pssPtr->xorBufCount);
		RF_ETIMER_START(timer);
		rf_MultiWayReconXor(raidPtr, pssPtr);
		RF_ETIMER_STOP(timer);
		RF_ETIMER_EVAL(timer);
		raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer);
		if (!keep_it) {
			raidPtr->recon_tracerecs[rbuf->col].xor_us =
			    RF_ETIMER_VAL_US(timer);
			RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col]
			    .recon_timer);
			RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col]
			    .recon_timer);
			raidPtr->recon_tracerecs[rbuf->col]
			    .specific.recon.recon_return_to_submit_us +=
			     RF_ETIMER_VAL_US(raidPtr
			      ->recon_tracerecs[rbuf->col].recon_timer);
			RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col]
			   .recon_timer);

			rf_LogTraceRec(raidPtr,
			    &raidPtr->recon_tracerecs[rbuf->col]);
		}
		rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr,
		    layoutPtr->numDataCol);

		/*
		 * If use_committed is on, we _must_ consume a buffer off the
		 * committed list.
		 */
		if (use_committed) {
			t = reconCtrlPtr->committedRbufs;
			RF_ASSERT(t);
			reconCtrlPtr->committedRbufs = t->next;
			rf_ReleaseFloatingReconBuffer(raidPtr, rbuf->row, t);
		}
		if (keep_it) {
			RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row,
			    rbuf->parityStripeID);
			RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
			rf_FreeReconBuffer(rbuf);
			return (retcode);
		}
		goto out;
	}
	/* Set the value of "t", which we'll use as the rbuf from here on. */
	if (keep_it) {
		t = rbuf;
	} else {
		if (use_committed) {
			/* If a buffer has been committed to us, use it. */

			t = reconCtrlPtr->committedRbufs;
			RF_ASSERT(t);
			reconCtrlPtr->committedRbufs = t->next;
			t->next = NULL;
		} else
			if (reconCtrlPtr->floatingRbufs) {
				t = reconCtrlPtr->floatingRbufs;
				reconCtrlPtr->floatingRbufs = t->next;
				t->next = NULL;
			}
	}

	/*
	 * If we weren't able to acquire a buffer, append to the end of the
	 * buf list in the recon ctrl struct.
	 */
	if (!t) {
		RF_ASSERT(!keep_it && !use_committed);
		Dprintf2("RECON: row %d col %d failed to acquire floating"
		    " rbuf.\n", rbuf->row, rbuf->col);

		raidPtr->procsInBufWait++;
		if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) &&
		    (raidPtr->numFullReconBuffers == 0)) {
			printf("Buffer wait deadlock detected. Exiting.\n");
			rf_PrintPSStatusTable(raidPtr, rbuf->row);
			RF_PANIC();
		}
		pssPtr->flags |= RF_PSS_BUFFERWAIT;
		/* Append to buf wait list in recon ctrl structure. */
		cb = rf_AllocCallbackDesc();
		cb->row = rbuf->row;
		cb->col = rbuf->col;
		cb->callbackArg.v = rbuf->parityStripeID;
		cb->callbackArg2.v = rbuf->which_ru;
		cb->next = NULL;
		if (!reconCtrlPtr->bufferWaitList)
			reconCtrlPtr->bufferWaitList = cb;
		else {
			/*
			 * Might want to maintain head/tail pointers
			 * here rather than search for end of list.
			 */
			for (p = reconCtrlPtr->bufferWaitList; p->next;
			     p = p->next);
			p->next = cb;
		}
		retcode = 1;
		goto out;
	}
	Dprintf2("RECON: row %d col %d acquired rbuf.\n", rbuf->row, rbuf->col);
	RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
	RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
	raidPtr->recon_tracerecs[rbuf->col]
	    .specific.recon.recon_return_to_submit_us +=
	     RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
	RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);

	rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);

	/* Initialize the buffer. */
	if (t != rbuf) {
		t->row = rbuf->row;
		t->col = reconCtrlPtr->fcol;
		t->parityStripeID = rbuf->parityStripeID;
		t->which_ru = rbuf->which_ru;
		t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
		t->spRow = rbuf->spRow;
		t->spCol = rbuf->spCol;
		t->spOffset = rbuf->spOffset;

		ta = t->buffer;
		t->buffer = rbuf->buffer;
		rbuf->buffer = ta;	/* Swap buffers. */
	}
	/*
	 * The first installation always gets installed as the destination
	 * buffer. Subsequent installations get stacked up to allow for
	 * multi-way XOR.
	 */
	if (!pssPtr->rbuf) {
		pssPtr->rbuf = t;
		t->count = 1;
	} else
		/* Install this buffer. */
		pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t;

	/* The buffer is full if G=2. */
	rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr,
	    layoutPtr->numDataCol);

out:
	RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
	RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
	return (retcode);
}

int
rf_MultiWayReconXor(
    RF_Raid_t			 *raidPtr,
    RF_ReconParityStripeStatus_t *pssPtr	/*
						 * The pss descriptor for this
						 * parity stripe.
						 */
)
{
	int i, numBufs = pssPtr->xorBufCount;
	int numBytes = rf_RaidAddressToByte(raidPtr,
	    raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU);
	RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor;
	RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;

	RF_ASSERT(pssPtr->rbuf != NULL);
	RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS);
#ifdef	_KERNEL
#if	!defined(__NetBSD__) && !defined(__OpenBSD__)
	/* Yield the processor before doing a big XOR. */
	thread_block();
#endif
#endif	/* _KERNEL */
	/*
	 * XXX
	 *
	 * What if more than 9 bufs ?
	 */
	rf_nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf,
	    numBytes / sizeof(long));

	/*
	 * Release all the reconstruction buffers except the last one, which
	 * belongs to the disk whose submission caused this XOR to take place.
	 */
	for (i = 0; i < numBufs - 1; i++) {
		if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING)
			rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row,
			    rbufs[i]);
		else
			if (rbufs[i]->type == RF_RBUF_TYPE_FORCED)
				rf_FreeReconBuffer(rbufs[i]);
			else
				RF_ASSERT(0);
	}
	targetRbuf->count += pssPtr->xorBufCount;
	pssPtr->xorBufCount = 0;
	return (0);
}


/*
 * Removes one full buffer from one of the full-buffer lists and returns it.
 *
 * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY.
 */
RF_ReconBuffer_t *
rf_GetFullReconBuffer(RF_ReconCtrl_t *reconCtrlPtr)
{
	RF_ReconBuffer_t *p;

	RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);

	if ((p = reconCtrlPtr->priorityList) != NULL) {
		reconCtrlPtr->priorityList = p->next;
		p->next = NULL;
		goto out;
	}
	if ((p = reconCtrlPtr->fullBufferList) != NULL) {
		reconCtrlPtr->fullBufferList = p->next;
		p->next = NULL;
		goto out;
	}
out:
	RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
	return (p);
}


/*
 * If the reconstruction buffer is full, move it to the full list, which
 * is maintained sorted by failed disk sector offset.
 *
 * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY.
 */
int
rf_CheckForFullRbuf(RF_Raid_t *raidPtr, RF_ReconCtrl_t *reconCtrl,
    RF_ReconParityStripeStatus_t *pssPtr, int numDataCol)
{
	RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;

	if (rbuf->count == numDataCol) {
		raidPtr->numFullReconBuffers++;
		Dprintf2("RECON: rbuf for psid %ld ru %d has filled.\n",
		    (long) rbuf->parityStripeID, rbuf->which_ru);
		if (!reconCtrl->fullBufferList ||
		    (rbuf->failedDiskSectorOffset <
		     reconCtrl->fullBufferList->failedDiskSectorOffset)) {
			Dprintf2("RECON: rbuf for psid %ld ru %d is head of"
			    " list.\n", (long) rbuf->parityStripeID,
			    rbuf->which_ru);
			rbuf->next = reconCtrl->fullBufferList;
			reconCtrl->fullBufferList = rbuf;
		} else {
			for (pt = reconCtrl->fullBufferList, p = pt->next;
			     p && p->failedDiskSectorOffset <
			      rbuf->failedDiskSectorOffset;
			     pt = p, p = p->next);
			rbuf->next = p;
			pt->next = rbuf;
			Dprintf2("RECON: rbuf for psid %ld ru %d is in list.\n",
			    (long) rbuf->parityStripeID, rbuf->which_ru);
		}
#if 0
		pssPtr->writeRbuf = pssPtr->rbuf;	/*
							 * DEBUG ONLY: We like
							 * to be able to find
							 * this rbuf while it's
							 * awaiting write.
							 */
#else
		rbuf->pssPtr = pssPtr;
#endif
		pssPtr->rbuf = NULL;
		rf_CauseReconEvent(raidPtr, rbuf->row, rbuf->col, NULL,
		    RF_REVENT_BUFREADY);
	}
	return (0);
}


/*
 * Release a floating recon buffer for someone else to use.
 * Assumes the rb_mutex is LOCKED at entry.
 */
void
rf_ReleaseFloatingReconBuffer(RF_Raid_t *raidPtr, RF_RowCol_t row,
    RF_ReconBuffer_t *rbuf)
{
	RF_ReconCtrl_t *rcPtr = raidPtr->reconControl[row];
	RF_CallbackDesc_t *cb;

	Dprintf2("RECON: releasing rbuf for psid %ld ru %d.\n",
	    (long) rbuf->parityStripeID, rbuf->which_ru);

	/*
	 * If anyone is waiting on buffers, wake one of them up. They will
	 * subsequently wake up anyone else waiting on their RU.
	 */
	if (rcPtr->bufferWaitList) {
		rbuf->next = rcPtr->committedRbufs;
		rcPtr->committedRbufs = rbuf;
		cb = rcPtr->bufferWaitList;
		rcPtr->bufferWaitList = cb->next;
		/* arg==1 => We've committed a buffer. */
		rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 1,
		    RF_REVENT_BUFCLEAR);
		rf_FreeCallbackDesc(cb);
		raidPtr->procsInBufWait--;
	} else {
		rbuf->next = rcPtr->floatingRbufs;
		rcPtr->floatingRbufs = rbuf;
	}
}


/*
 * Release any disk that is waiting on a buffer for the indicated RU.
 * Assumes the rb_mutex is LOCKED at entry.
 */
void
rf_ReleaseBufferWaiters(
    RF_Raid_t			 *raidPtr,
    RF_ReconParityStripeStatus_t *pssPtr
)
{
	RF_CallbackDesc_t *cb1, *cb = pssPtr->bufWaitList;

	Dprintf2("RECON: releasing buf waiters for psid %ld ru %d.\n",
	    (long) pssPtr->parityStripeID, pssPtr->which_ru);
	pssPtr->flags &= ~RF_PSS_BUFFERWAIT;
	while (cb) {
		cb1 = cb->next;
		cb->next = NULL;
		/* arg==0 => We haven't committed a buffer. */
		rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 0,
		    RF_REVENT_BUFCLEAR);
		rf_FreeCallbackDesc(cb);
		cb = cb1;
	}
	pssPtr->bufWaitList = NULL;
}


/*
 * When reconstruction is forced on an RU, there may be some disks waiting to
 * acquire a buffer for that RU. Since we allocate a new buffer as part of
 * the forced-reconstruction process, we no longer have to wait for any
 * buffers, so we wakeup any waiter that we find in the bufferWaitList.
 *
 * Assumes the rb_mutex is LOCKED at entry.
 */
void
rf_ReleaseBufferWaiter(RF_ReconCtrl_t *rcPtr, RF_ReconBuffer_t *rbuf)
{
	RF_CallbackDesc_t *cb, *cbt;

	for (cbt = NULL, cb = rcPtr->bufferWaitList; cb;
	     cbt = cb, cb = cb->next) {
		if ((cb->callbackArg.v == rbuf->parityStripeID) &&
		    (cb->callbackArg2.v == rbuf->which_ru)) {
			Dprintf2("RECON: Dropping row %d col %d from buffer"
			    " wait list.\n", cb->row, cb->col);
			if (cbt)
				cbt->next = cb->next;
			else
				rcPtr->bufferWaitList = cb->next;

			/* arg==0 => No committed buffer. */
			rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr,
			    cb->row, cb->col, (void *) 0, RF_REVENT_BUFREADY);
			rf_FreeCallbackDesc(cb);
			return;
		}
	}
}
